Overview

merging background and presence data into one giant dataset, and timesliced subsets

A note to anyone who might happen to stumble across this… I am a beginner in R and have had no exposure to similar languages. I don’t know what I’m doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.

Built with ‘r getRversion()’.

Package dependencies

You can load them using the following code which uses a function called ipak. Note this function checks to see if the packages are installed first.

packages <- c("plyr") 
source("./src/ipak.R")
ipak(packages)
plyr 
TRUE 

read in the presence and background

presencemerging <- read.csv("../output/bio/presence_points_without_envdata_relooped_glbathy_nodup.csv", header = TRUE)
backgroundmerging <- read.csv("../output/bio/background_complete_obs_cels_globot_cellid_xyzt.csv", header = TRUE)

now check

colnames(presencemerging)
 [1] "cell_id"                "year"                   "month"                  "depthlayerno"           "id"                     "decimalLatitude"        "decimalLongitude"      
 [8] "datecollected"          "institutioncode"        "individualcount"        "depth"                  "resname"                "originalscientificname" "collectioncode"        
[15] "day"                    "occurrence"             "nafo_zone"              "gear"                   "longitude_meters"       "latitude_meters"        "amo_sample"            
[22] "amo_prev"               "amo_winter"             "depth_layer"            "bottom_depth"           "total_cell_obs"         "yymm_cell_obs"          "chl_surface"           
[29] "chl_depth"              "mlp_surface"            "o2_surface"             "o2_depth"               "salinity_surface"       "salinity_depth"         "ssh_surface"           
[36] "temp_surface"           "temp_depth"             "nao_sample"             "nao_prev"               "nao_winter"             "XXtotal_cell_obs_xyzt"  "temp_celsius_depth"    
[43] "temp_celsius_surface"   "longitude_meters.1"     "latitude_meters.1"      "bottom_depth_glorys"    "longitude_meters.2"     "latitude_meters.2"      "cell_id_3d"            
[50] "cell_id_xyzt"           "total_cell_obs_xyzt"   
colnames(backgroundmerging)
 [1] "cell_id"                "year"                   "month"                  "depthlayerno"           "X"                      "longitude_meters"       "latitude_meters"       
 [8] "decimalLongitude"       "decimalLatitude"        "nafo_zone"              "id"                     "originalscientificname" "amo_sample"             "amo_prev"              
[15] "amo_winter"             "nao_sample"             "nao_prev"               "nao_winter"             "depth_layer"            "bottom_depth"           "total_cell_obs_xy"     
[22] "total_cell_obs_xyt"     "total_cell_obs_xyzt"    "temp_depth"             "temp_surface"           "salinity_depth"         "salinity_surface"       "chl_depth"             
[29] "chl_surface"            "o2_depth"               "o2_surface"             "mlp_surface"            "ssh_surface"            "longitude_meters.1"     "latitude_meters.1"     
[36] "optional"               "occurrence"             "temp_celsius_depth"     "temp_celsius_surface"   "bottom_depth_glorys"    "longitude_meters.2"     "latitude_meters.2"     
[43] "optional.1"             "cell_id_3d"             "cell_id_xyzt"          

ok so the two dataframed need a spot of cleaning

presencemerging <- subset(presencemerging, select = -c(longitude_meters.1, latitude_meters.1, longitude_meters.2, latitude_meters.2, XXtotal_cell_obs_xyzt))
Error in eval(substitute(select), nl, parent.frame()) : 
  object 'longitude_meters.1' not found

for consistency, rename a couple of presencemerging columns

names(presencemerging)[names(presencemerging)=="total_cell_obs"] <- "total_cell_obs_xy"
names(presencemerging)[names(presencemerging)=="yymm_cell_obs"] <- "total_cell_obs_xyt"

now merge the two datasets

presab <- rbind.fill(presencemerging, backgroundmerging)
write.csv(presab, "../output/bio/presab.csv", row.names = FALSE)
head(presab)

lovely jubley

LS0tDQp0aXRsZTogIm1lcmdpbmdfZGF0YXNldHMiDQphdXRob3I6ICJTYW1hbnRoYSBBbmRyZXdzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyBPdmVydmlldw0KbWVyZ2luZyBiYWNrZ3JvdW5kIGFuZCBwcmVzZW5jZSBkYXRhIGludG8gb25lIGdpYW50IGRhdGFzZXQsIGFuZCB0aW1lc2xpY2VkIHN1YnNldHMNCg0KDQpBIG5vdGUgdG8gYW55b25lIHdobyBtaWdodCBoYXBwZW4gdG8gc3R1bWJsZSBhY3Jvc3MgdGhpcy4uLiBJIGFtIGEgYmVnaW5uZXIgaW4gUiBhbmQgaGF2ZSBoYWQgbm8gZXhwb3N1cmUgdG8gc2ltaWxhciBsYW5ndWFnZXMuIEkgZG9uJ3Qga25vdyB3aGF0IEknbSBkb2luZy4gVGhlIGNvZGUgaGVyZWluIGlzIHVubGlrZWx5IHRvIGJlIGVsZWdhbnQgYW5kIHRoZXJlIGFyZSBwcm9iYWJseSBtb3JlIGVmZmljaWVudCB3YXlzIG9mIHJ1bm5pbmcgdGhlIGNvZGUuDQoNCkJ1aWx0IHdpdGggJ3IgZ2V0UnZlcnNpb24oKScuDQoNCiMgUGFja2FnZSBkZXBlbmRlbmNpZXMNCllvdSBjYW4gbG9hZCB0aGVtIHVzaW5nIHRoZSBmb2xsb3dpbmcgY29kZSB3aGljaCB1c2VzIGEgZnVuY3Rpb24gY2FsbGVkIFtpcGFrXShodHRwczovL2dpc3QuZ2l0aHViLmNvbS9zdGV2ZW53b3J0aGluZ3Rvbi8zMTc4MTYzKS4gDQpOb3RlIHRoaXMgZnVuY3Rpb24gY2hlY2tzIHRvIHNlZSBpZiB0aGUgcGFja2FnZXMgYXJlIGluc3RhbGxlZCBmaXJzdC4NCmBgYHtyIHByZS1pbnN0YWxsIHBhY2thZ2VzLCBtZXNzYWdlPUZBTFNFfQ0KcGFja2FnZXMgPC0gYygicGx5ciIpIA0Kc291cmNlKCIuLi9zcmMvaXBhay5SIikNCmlwYWsocGFja2FnZXMpDQpgYGANCg0KcmVhZCBpbiB0aGUgcHJlc2VuY2UgYW5kIGJhY2tncm91bmQNCmBgYHtyfQ0KcHJlc2VuY2VtZXJnaW5nIDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvYmlvL3ByZXNlbmNlX3BvaW50c193aXRob3V0X2VudmRhdGFfcmVsb29wZWRfZ2xiYXRoeV9ub2R1cC5jc3YiLCBoZWFkZXIgPSBUUlVFKQ0KYmFja2dyb3VuZG1lcmdpbmcgPC0gcmVhZC5jc3YoIi4uL291dHB1dC9iaW8vYmFja2dyb3VuZF9jb21wbGV0ZV9vYnNfY2Vsc19nbG9ib3RfY2VsbGlkX3h5enQuY3N2IiwgaGVhZGVyID0gVFJVRSkNCmBgYA0KDQpub3cgY2hlY2sNCmBgYHtyfQ0KY29sbmFtZXMocHJlc2VuY2VtZXJnaW5nKQ0KYGBgDQoNCmBgYHtyfQ0KY29sbmFtZXMoYmFja2dyb3VuZG1lcmdpbmcpDQpgYGANCg0Kb2sgc28gdGhlIHR3byBkYXRhZnJhbWVkIG5lZWQgYSBzcG90IG9mIGNsZWFuaW5nDQoNCmBgYHtyfQ0KcHJlc2VuY2VtZXJnaW5nIDwtIHN1YnNldChwcmVzZW5jZW1lcmdpbmcsIHNlbGVjdCA9IC1jKGxvbmdpdHVkZV9tZXRlcnMuMSwgbGF0aXR1ZGVfbWV0ZXJzLjEsIGxvbmdpdHVkZV9tZXRlcnMuMiwgbGF0aXR1ZGVfbWV0ZXJzLjIsIFhYdG90YWxfY2VsbF9vYnNfeHl6dCkpDQpiYWNrZ3JvdW5kbWVyZ2luZyA8LSBzdWJzZXQoYmFja2dyb3VuZG1lcmdpbmcsIHNlbGVjdCA9IC1jKGxvbmdpdHVkZV9tZXRlcnMuMSwgbGF0aXR1ZGVfbWV0ZXJzLjEsIGxvbmdpdHVkZV9tZXRlcnMuMiwgbGF0aXR1ZGVfbWV0ZXJzLjIsIFgsIG9wdGlvbmFsLCBvcHRpb25hbC4xKSkNCmBgYA0KDQpmb3IgY29uc2lzdGVuY3ksIHJlbmFtZSBhIGNvdXBsZSBvZiBwcmVzZW5jZW1lcmdpbmcgY29sdW1ucw0KYGBge3J9DQpuYW1lcyhwcmVzZW5jZW1lcmdpbmcpW25hbWVzKHByZXNlbmNlbWVyZ2luZyk9PSJ0b3RhbF9jZWxsX29icyJdIDwtICJ0b3RhbF9jZWxsX29ic194eSINCm5hbWVzKHByZXNlbmNlbWVyZ2luZylbbmFtZXMocHJlc2VuY2VtZXJnaW5nKT09Inl5bW1fY2VsbF9vYnMiXSA8LSAidG90YWxfY2VsbF9vYnNfeHl0Ig0KYGBgDQoNCm5vdyBtZXJnZSB0aGUgdHdvIGRhdGFzZXRzDQpgYGB7cn0NCnByZXNhYiA8LSByYmluZC5maWxsKHByZXNlbmNlbWVyZ2luZywgYmFja2dyb3VuZG1lcmdpbmcpDQp3cml0ZS5jc3YocHJlc2FiLCAiLi4vb3V0cHV0L2Jpby9wcmVzYWIuY3N2Iiwgcm93Lm5hbWVzID0gRkFMU0UpDQpoZWFkKHByZXNhYikNCmBgYA0KDQpsb3ZlbHkganVibGV5DQoNCg0KDQoNCg0KDQo=